%{
AUTHOR: Felipe Arteaga
-------------------------------------------------------------------------
PROJECT: Warnings
-------------------------------------------------------------------------
DESCRIPTION:
=========================================================================
%}


clearvars -except projectDir projectDirData fromMainWarningsPaper
clc;close all;fclose('all');feature('DefaultCharacterSet','UTF-8');

if(not(exist('projectDir','var')==1&&exist('projectDirData','var')==1&&exist('fromMainWarningsPaper','var')==1&&fromMainWarningsPaper))
    pcName=char(java.lang.System.getProperty('user.name'));
    if(strcmp(pcName,'felipe'))
        % PC Felipe
        myDir='/Users/felipe/Dropbox/';
        projectDir=[myDir,'git/warnings/'];
        projectDirData=[myDir,'projects/warnings/'];
        addpath(genpath([myDir,'/myMatlabFunctions/']));
    end
end



dirPlots=[projectDir,'/paper/figuresCL/other/'];
dirData=[projectDirData,'/data/chile/'];



%% Load data:
anho=0; %0: pooled
if(anho==0)
    anhoStr='';
else
    anhoStr=sprintf('%i',anho);
end



if(anho==0)
    
    d1=load([dirData,'2018/inputRD'],'dataRD');
    d2=load([dirData,'2019/inputRD'],'dataRD');
    d3=load([dirData,'2020/inputRD'],'dataRD');
    % Variables that are not available for 2020 yet:
    %fillWithNan={'enrolledInAssigned','enrolledInAddedIniEnd','enrolledInAddedNoWaitlistIniEnd'};
    fillWithNan={};
    %fillWithNan={'assignedToPref','acceptOffer','declineOffer','preferenciaAsign_1','placedInAnyPreferenceIniEnd','placedInAddedIniEnd','placedInAddedNoWaitlistIniEnd','placedInOldIniEnd','enrolledInAssigned','enrolledInAddedIniEnd','enrolledInAddedNoWaitlistIniEnd'};
    for v=1:length(fillWithNan)
        d3.dataRD.(fillWithNan{v})=nan(height(d3.dataRD),1);
    end
    
    
    % Universe that is comparable between 2018-2020
    entryGrades=[-1 0 1 7 9];
    sirven1=ismember(d1.dataRD.grade,entryGrades)&not(d1.dataRD.cod_reg==13);
    sirven2=ismember(d2.dataRD.grade,entryGrades)&not(d2.dataRD.cod_reg==13);
    sirven3=ismember(d3.dataRD.grade,entryGrades)&not(d3.dataRD.cod_reg==13);
    
    d1.dataRD.comparable=sirven1;
    d2.dataRD.comparable=sirven2;
    d3.dataRD.comparable=sirven3;
    
    d1.dataRD.anho=2018*ones(height(d1.dataRD),1);
    d2.dataRD.anho=2019*ones(height(d2.dataRD),1);
    d3.dataRD.anho=2020*ones(height(d3.dataRD),1);
    
    % Keep vars in common
    
    incommon=intersect(intersect(d1.dataRD.Properties.VariableNames,d2.dataRD.Properties.VariableNames),d3.dataRD.Properties.VariableNames);
    incommon=incommon(not(strcmp(incommon,'mrun')));
    dataRD=[d1.dataRD(:,incommon);d2.dataRD(:,incommon);d3.dataRD(:,incommon)];
    
    
    
else
    load([dirData,anhoStr,'/inputRD'],'dataRD')
end

dataRD.rural=dataRD.newMarket<0;

% As asignment at .3 is control, just move (if any) in .3 to epsilon to the
% left:
dataRD.riskPopup(dataRD.riskPopup==.3)=.29999999;



dataRD.Properties.VariableDescriptions{'riskPopup'}='Predicted placement risk';
dataRD.Properties.VariableDescriptions{'treatedPopup'}='Treated pop-up in first attempt';


% Load survey data
survey=readtable([dirData,'2020/dataEncuestaMail.csv']);
assert(all((ismember(survey.id_postulante,dataRD.id_postulante))))


% Merge survey data:
survey.withSurvey=true(height(survey),1);
dataRD=outerjoin(dataRD,survey,'keys',{'id_postulante'},'mergeKeys',true,'type','left');





%% Histogram predicted risk (to check if there are discontinuities)
% Only initial risk
compareHistograms(dataRD.riskPopup(dataRD.pobPopup&dataRD.riskPopup>0.01),'binedges',0:0.05:1,'withMean',false)
xlabel('Predicted placement risk')
yLim=ylim;
hold on
plot([.3 .3],[0 yLim(2)],'linestyle','--','color',.5*[1 1 1]);

ht = text([.32],.15,'Risky threshold','Interpreter','latex');
set(ht,'Rotation',90)
easyExport([dirPlots,sprintf('%s_P_%s_%s',anhoStr,'riskPopupDistribution','all')],'displayLatex',true,'caption','Distribution of predicted risk (only >0)','includeRelativePath',1);


% 1) Prob that you accept on rank
% 2) Prob of accepting on satisfaction
% 3) Prob of enroll on rank
% 4) Prob of enroll on satisfaction
% 5) Prob of enroll given the level of knowledge (diff between obligated and not obligated)

%% Prob

dataRD.prefAsign=dataRD.preferenciaAsign_1;
dataRD.prefAsign(dataRD.prefAsign>5)=5;
dataRD.prefAsign=categorical(dataRD.prefAsign);
dataRD.prefAsign=renamecats(dataRD.prefAsign,'5','5+');


knowledgePlacement=scalarForTable({''},dataRD);
for p=1:5
    knowledgePlacement(dataRD.preferenciaAsign_1==p)=dataRD.(sprintf('knowledgeApp%i',p))(dataRD.preferenciaAsign_1==p);
end
dataRD.knowledgePlacement=categorical(knowledgePlacement);


% Prob of decline by rank
figure
sirven=dataRD.assignedToPref==1; % Ojo que esto no incluye a los devueltos
barmean(dataRD(sirven,:),'prefAsign','declineOffer','mps',0.01,'all',0,'pdiff',0)
title('Fraction decline offer')
xlabel('Preference of placement')
easyExport()

figure
sirven=dataRD.assignedToPref==1&dataRD.lengthEnd==5;
barmean(dataRD(sirven,:),'prefAsign','declineOffer','mps',0.01,'all',0,'pdiff',0)
title('Fraction decline offer conditional on applying to 5')
xlabel('Preference of placement')
easyExport()

% Prob of enrolled in assigned by rank

sirven=dataRD.assignedToPref==1; % Ojo que esto no incluye a los devueltos
barmean(dataRD(sirven,:),'prefAsign','enrolledInAssigned','mps',0.01,'all',0,'pdiff',0)
%title('Fraction enroll in placed school')
ylabel('Enrolled in placed')
xlabel('Preference of placement')
easyExport([dirPlots,'EnrolledVsPrefPlaced'])


sirven=dataRD.assignedToPref==1&dataRD.lengthEnd==5;
barmean(dataRD(sirven,:),'prefAsign','enrolledInAssigned','mps',0.01,'all',0,'pdiff',0)
title('Fraction enroll in placed school conditional on applying to 5')
xlabel('Preference of placement')
easyExport()

% Prob of decline assigned by satisfaction

figure
sirven1=dataRD.assignedToPref==1&dataRD.withSurvey&dataRD.preferenciaAsign_1==1;
sirven2=dataRD.assignedToPref==1&dataRD.withSurvey&dataRD.preferenciaAsign_1==dataRD.lengthEnd&dataRD.lengthEnd>1;

dataRD.newSatis_1=categorical(dataRD.satisf_1,7:-1:1);
dataRD.newSatis_1=renamecats(dataRD.newSatis_1,'7','7 (highest)');
dataRD.newSatis_1=renamecats(dataRD.newSatis_1,'1','1 (lowest)');

dataRD.newSatis_last=categorical(dataRD.satisf_last,7:-1:1);
dataRD.newSatis_last=renamecats(dataRD.newSatis_last,'7','7 (highest)');
dataRD.newSatis_last=renamecats(dataRD.newSatis_last,'1','1 (lowest)');

dataRD.newSatis=scalarForTable(categorical(0,1,'1'),dataRD);

dataRD.newSatis(sirven1)=dataRD.newSatis_1(sirven1);
dataRD.newSatis(sirven2)=dataRD.newSatis_last(sirven2);

dataRD.notDecline=1-dataRD.declineOffer;

barmean(dataRD(sirven1|sirven2,:),'newSatis','notDecline','mps',0.01,'all',1,'pdiff',0)
%title('Fraction don''t decline offer conditional on placed in 1st or last opt')
xlabel('Satisfaction with hypothetical placement (before placement is known)')
ylabel('Do not decline offer')
easyExport([dirPlots,'NotDeclineVsSatisfaction'])

figure
barmean(dataRD(sirven1|sirven2,:),'newSatis','enrolledInAssigned','mps',0.01,'all',1,'pdiff',0)
ylabel('Enrolled in placed')
xlabel('Satisfaction with hypothetical placement (before placement is known)')
easyExport([dirPlots,'EnrollVsSatisfaction'])


% Enroll by satisfaction
figure
sirven=dataRD.withSurvey;
barmean(dataRD(sirven,:),'satisf_last','declineOffer','mps',0.01,'all',1,'pdiff',0)
title('Fraction decline offer conditional on placed in last option')
xlabel('Satisfaction with hypothetical placement on last (before placement is known)')
easyExport() 



% Decline depending on knowledge
figure
sirven=dataRD.assignedToPref==1&dataRD.withSurvey;
barmean(dataRD(sirven,:),'knowledgePlacement','declineOffer','mps',0.01,'all',1,'pdiff',0)
title('Fraction decline offer')
xlabel('Knowledge of school')
easyExport()


% Decline depending on knowledge
figure
sirven=dataRD.assignedToPref==1&dataRD.withSurvey&dataRD.preferenciaAsign_1==1;
barmean(dataRD(sirven,:),'knowledgePlacement','declineOffer','mps',0.01,'all',1,'pdiff',0)
title('Fraction decline offer conditional on placed in 1st')
xlabel('Knowledge of 1st option')
easyExport()



% Decline depending on knowledge
figure
sirven=dataRD.withSurvey;
barmean(dataRD(sirven,:),'knowledgeApp1','satisf_1','mps',0.01,'all',1,'pdiff',0)
title('Mean ex-ante satisfaction conditional on hyp. placement on 1st')
xlabel('Knowledge of 1st option')
easyExport()


%% Histogram of risk

figure
compareHistograms({1-dataRD.riskExpostIni,1-dataRD.riskExpostEnd},'labels',{'First Attempt','Last Attempt'})
xlabel('True placement probability')
easyExport([dirPlots,sprintf('%s_P_%s_%s',anhoStr,'riskDistribution','all')],'displayLatex',true,'caption','Placement probability distribution','includeRelativePath',1);


% Riskies
figure
compareHistograms({1-dataRD.riskExpostIni(dataRD.riskExpostEnd>0.01),1-dataRD.riskExpostEnd(dataRD.riskExpostEnd>0.01)},'labels',{'First Attempt','Last Attempt'})
xlabel('True placement probability')
easyExport([dirPlots,sprintf('%s_P_%s_%s',anhoStr,'riskDistribution','riskies')],'displayLatex',true,'caption','Placement probability distribution (Only Risk>0)','includeRelativePath',1);

%%
% Only initial risk
compareHistograms({1-dataRD.riskExpostIni})
xlabel('True placement probability')
easyExport([dirPlots,sprintf('%s_P_%s_%s',anhoStr,'riskDistribution1stAttemp','all')],'displayLatex',true,'caption','Placement probability distribution (Only Risk>0)','includeRelativePath',1);

compareHistograms({1-dataRD.riskExpostIni(dataRD.riskExpostIni>.01)},'wm',0)
hold on
xlabel('True placement probability - 1st attempt')
arrowr = annotation('arrow') ;
arrowr.Parent = gca;
arrowr.Position = [.7, 1*.25, -.1, 0] ;
hold on
plot([.7 .7],[0 1],'linestyle','--','color',.5*[1 1 1]);


annotation2('textbox',[.7, 1*.2],'w','String','"Risky"','Interpreter','latex','edgecolor','none')

easyExport([dirPlots,sprintf('%s_P_%s_%s',anhoStr,'riskDistribution1stAttemp','riskies')],'displayLatex',true,'caption','Placement probability distribution (Only Risk>0)','includeRelativePath',1);



%% Mini table of % of risky per year

if(anho==0)
    dataRD.risky=dataRD.riskExpostIni>.3;
    dataRD.N=ones(height(dataRD),1);
    data=stataCollapse({'anho'},dataRD,{'risky','N'},{'mean','sum'});
    
    cellImprimir=mat2cellstr([data.risky*100,data.N]','cr',1,'dp','%.1f');
    opts=struct;
    opts.titulo='';
    opts.label='';
    opts.header=mat2cellstr(data.anho,'wts',0)';
    opts.columnasFantasma=[];
    opts.filasFantasma=[];
    opts.primeraColumna={'Risky [%%]','Applicants'}';
    %opts.file='';
    cell2latex(cellImprimir,'opts',opts)
    
end

%% Binsreg True Risk vs Popup risk
colors=linspecer(1);
% True risk in x axis: conditional on true risk in (0.01-0.99)
figure

condition=dataRD.pobPopup==1;
condition2=condition&dataRD.riskExpostIni>0.01;
edges=[quantile(1-dataRD.riskExpostIni(condition2),10) 0.99 1];
assert(sum(edges==0)==1)
preB=discretize(1-dataRD.riskExpostIni(condition),edges,'includedEdge','left');

br1=binsreg(1-dataRD.riskExpostIni(condition),1-dataRD.riskPopup(condition),'markeredgecolor',colors(1,:),'fitcolor',colors(1,:),'areacolor',colors(1,:),'modifyXticks', false,'wlf',true,'poscoefffit',[.65 .5 .1 .1],'modifyXTicks',false,'binStrategy','custom','preB',preB,'wiq',true);
xlabel('True placement probability')
ylabel('Predicted placement probability')

hold on
aux=plot([0 1],[0 1],':','linewidth',1,'color','k');
legend([br1.scatter br1.linearFitPlot aux br1.quantileArea],{'Bin mean','Linear fit','$45^{\circ}$','Interquartile range'},'location','southeast','interpreter','latex')
easyExport([dirPlots,sprintf('%s_P_%s_%s',anhoStr,'binsregPredVsTrueRisk','all')],'displayLatex',false,'caption','Binscatter predicted vs true placement probability');


% Predicted risk in x axis: conditional on predicted risk in (0.01-0.99)
figure
condition=dataRD.pobPopup==1&dataRD.riskPopup>0.01&dataRD.riskPopup<0.99;
%condition=dataRD.pobPopup==1;
b=binsreg(1-dataRD.riskPopup(condition),1-dataRD.riskExpostIni(condition),'modifyXticks', false,'nb',20,'wlf',true,'poscoefffit',[.65 .3 .1 .1]);
ylabel('True placement probability')
xlabel('Predicted placement probability')
hold on
aux=plot([0 1],[0 1],':','linewidth',1,'color','k');
legend([b.linearFitPlot aux],{'Linear fit','$45^{\circ}$'},'location','southeast','Interpreter','latex')
easyExport([dirPlots,sprintf('%s_P_%s_%s',anhoStr,'binsregTrueVsPredRisk','all')],'displayLatex',false,'caption','Binscatter true vs predicted placement probability');


%% Binsreg True Risk  vs placement


figure


condition=scalarForTable(true,dataRD);
condition2=condition&dataRD.riskExpostEnd>0.01;
edges=[quantile(1-dataRD.riskExpostEnd(condition2),10) 0.99 1];
assert(sum(edges==0)==1)
preB=discretize(1-dataRD.riskExpostEnd(condition),edges,'includedEdge','left');


b=binsreg(1-dataRD.riskExpostEnd(condition),dataRD.assignedToPref(condition),'modifyXticks', false,'nb',20,'wlf',true,'poscoefffit',[.15 .7 .1 .1],'preB',preB,'binstrategy','custom');
xlabel('True placement probability')
ylabel('Placed')
hold on
aux=plot([0 1],[0 1],':','linewidth',1,'color','k');
legend([b.linearFitPlot aux],{'Linear fit','$45^{\circ}$'},'location','southeast','Interpreter','latex')
easyExport([dirPlots,sprintf('%s_P_%s_%s',anhoStr,'binsregPlacedVsTrueRisk','all')],'displayLatex',false,'caption','Binscatter placed vs true placement probability');

%% Binsreg Pred Risk last attempt  vs placement
figure
% OJO: this is predicted risk for LAST attempt

%%%%%%

condition=dataRD.pobPopup==1;
condition2=condition&dataRD.riskApiEnd>0.01;
edges=[quantile(1-dataRD.riskApiEnd(condition2),10) 0.99 1];
assert(sum(edges==0)==1)
preB=discretize(1-dataRD.riskApiEnd(condition),edges,'includedEdge','left');


b=binsreg(1-dataRD.riskApiEnd(condition),dataRD.assignedToPref(condition),'markeredgecolor',colors(1,:),'fitcolor',colors(1,:),'modifyXticks', false,'nb',20,'wlf',true,'poscoefffit',[.65 .5 .1 .1],'modifyXTicks',false,'preB',preB,'binstrategy','custom');
xlabel('Predicted placement probability')
ylabel('Placed')


hold on
aux=plot([0 1],[0 1],':','linewidth',1,'color','k');
legend([br1.scatter b.linearFitPlot aux],{'Bin mean','Linear fit','$45^{\circ}$'},'location','southeast','Interpreter','latex')
easyExport([dirPlots,sprintf('%s_P_%s_%s',anhoStr,'binsregPlacedVsPredRisk','all')],'displayLatex',false,'caption','Binscatter placed vs predicted placement probability');





